package com.shujia.spark

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Demo6Sample {

  def main(args: Array[String]): Unit = {
    val conf: SparkConf = new SparkConf().setMaster("local[4]").setAppName("map")
    val sc: SparkContext = new SparkContext(conf)


    val student: RDD[String] = sc.textFile("spark/data/students.txt")


    /**
      * sample 抽样
      *
      */

    val rdd2: RDD[String] = student.sample(true, 0.1)


    /**
      * count 统计rdd的行数
      *
      */
    val count: Long = rdd2.count()

    println(count)


  }

}
